library(readxl)
library(purrr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(tidyr)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:purrr':
##
## set_names
Date and type of antimicrobial use per farm:
amu <- read_excel("AMU_KietStudy_Marc.xlsx")
Antimicrobial class of each antimicrobial:
classes <- read_excel("FarmvsClass.xlsx")
Resistance genes concentrations in chicken:
farms <- paste0("K", c(formatC(6:9, 1, flag = "0"), 11:26))
genes <- farms %>%
map(read_excel, path = "KietAnalysisData.xlsx") %>%
map(filter, Group.1 %in% c("Chicken-Control", "Chicken-Treatment")) %>%
map(select, -Group.1) %>%
setNames(farms)
Let’s check that the names of the columns are the same for all the farms:
tmp <- genes %>%
map_df(names) %>%
apply(1, unique)
tmp[map_int(tmp, length) > 1]
## [[1]]
## [1] "FarmID" "K09"
There is a problem with with the FarmID variable that is
called K09 is one farm. Let’s fix this. The names of the
variables seem OK in the first farm:
(correct_names <- names(genes[[1]]))
## [1] "FarmID" "Group" "Sample_Name" "SamplingDay"
## [5] "aac3-liacde" "aac6-aph2" "aac6-lb" "aac6-li"
## [9] "aac6-lla" "aadA" "aadE" "aadE-like"
## [13] "acrA" "acrB" "acrF" "aph2-lb"
## [17] "aph2-lde" "aph3-lac" "aph3-lll" "arnA"
## [21] "bacA_1" "bacA_2" "blaAMPC" "blaCMY2"
## [25] "blaCTXM" "blaDHA" "blaPSE" "blaSHV"
## [29] "blaTEM" "blaZ" "cat" "cblA"
## [33] "cepA" "cepA2" "cfr" "cfr2"
## [37] "cfxA" "cmlA1-01" "cmr" "dfrA"
## [41] "dfrF" "ermA" "ermB" "ermC"
## [45] "floR" "folA" "fosB" "fox5"
## [49] "macB" "mcr-1" "mcr-2" "mcr-3"
## [53] "mdtF" "mdtL" "mdtO" "mecA"
## [57] "mefa10" "mefA3" "mfsA" "oprD"
## [61] "oqxA" "oqxB" "qacA" "qacC"
## [65] "qacE" "qnrA" "qnrB" "qnrS"
## [69] "sat4" "spc" "strB" "sul1"
## [73] "sul2" "sul3" "tetB" "tetC-01"
## [77] "tetM" "tetO" "tetQ" "tetW"
## [81] "tolC" "vanA" "vanB" "vatA"
## [85] "TotalLog2Value"
Let’s use them as variables names for all the farms:
genes %<>% map(setNames, correct_names)
Note also that not all farms have a “Before” measurement in the control group:
tmp <- genes %>%
map(~ .x %>% filter(SamplingDay == "Before") %>% select(Group)) %>%
.[map_int(., nrow) < 2] %>%
unlist()
tmp
## K14.Group K15.Group K16.Group K17.Group K18.Group K20.Group
## "Treatment" "Treatment" "Treatment" "Treatment" "Treatment" "Treatment"
## K21.Group K22.Group K23.Group
## "Treatment" "Treatment" "Treatment"
For the farms where the “Before” measurement is missing in the control group, let’s just use the “Before” measurement of the treatment group. First, we need this function:
control_before <- function(x) {
y <- filter(x, SamplingDay == "Before")
y$Group <- "Control"
y$Sample_Name <- NA
rbind(x, y)
}
Now we can use this function to make the fix:
farms_to_fix <- tmp %>%
names() %>%
str_remove(".Group")
fixed_farms <- genes %>%
extract(farms_to_fix) %>%
map(control_before)
genes[farms_to_fix] <- fixed_farms
Names of resistance genes:
resistance_genes <- setdiff(names(genes[[1]]),
c("FarmID", "Group", "Sample_Name", "SamplingDay", "TotalLog2Value"))
The antimicrobial classes to which each resistance gene corresponds:
(classes_names <- unique(classes$Antimicrobial_Class))
## [1] "Aminoglycoside" "Multidrug" "Polymyxin" "Other"
## [5] "Beta-Lactam" "Phenicol" "Sulfonamide" "MLSB"
## [9] "Quinolone" "Tetracycline" "Glycopeptide"
cln <- classes_names[1]
ttt <- classes %>%
filter(Antimicrobial_Class == cln) %>%
pull(EvaGreen_Name)
rowSums(genes[[1]][ttt])
## [1] 71.70042 51.92157 33.99088 44.96356 40.94577 60.28016 65.06596 38.21312
## [9] 57.22011 55.10100
farm <- genes[[1]]
am_class <- classes_names[1]
sum_by_class <- function(am_class, farm) {
classes %>%
filter(Antimicrobial_Class == am_class) %>%
pull(EvaGreen_Name) %>%
extract(farm, .) %>%
rowSums()
}
add_sums_by_class <- function(farm) {
farm %>%
map_dfc(classes_names, sum_by_class, .) %>%
setNames(classes_names) %>%
bind_cols(genes[[1]], .)
}
map(genes[1:2], add_sums_by_class)
## New names:
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## $K06
## # A tibble: 10 × 96
## FarmID Group Sample_Name SamplingDay `aac3-liacde` `aac6-aph2` `aac6-lb`
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 K06 Control K06-BC Before 4.74 7.42 5.75
## 2 K06 Control K06-07C 7 1.96 5.55 2.21
## 3 K06 Control K06-14C 14 0.205 4.55 0.138
## 4 K06 Control K06-60C 60 1.46 6.01 1.69
## 5 K06 Control K06-EC End 0.856 5.57 1.08
## 6 K06 Treatment K06-B Before 3.65 6.20 5.18
## 7 K06 Treatment K06-07 7 4.00 5.85 3.93
## 8 K06 Treatment K06-14 14 1.96 5.01 0.803
## 9 K06 Treatment K06-60 60 5.36 3.07 5.93
## 10 K06 Treatment K06-E End 5.72 5.49 4.44
## # … with 89 more variables: `aac6-li` <dbl>, `aac6-lla` <dbl>, aadA <dbl>,
## # aadE <dbl>, `aadE-like` <dbl>, acrA <dbl>, acrB <dbl>, acrF <dbl>,
## # `aph2-lb` <dbl>, `aph2-lde` <dbl>, `aph3-lac` <dbl>, `aph3-lll` <dbl>,
## # arnA <dbl>, bacA_1 <dbl>, bacA_2 <dbl>, blaAMPC <dbl>, blaCMY2 <dbl>,
## # blaCTXM <dbl>, blaDHA <dbl>, blaPSE <dbl>, blaSHV <dbl>, blaTEM <dbl>,
## # blaZ <dbl>, cat <dbl>, cblA <dbl>, cepA <dbl>, cepA2 <dbl>, cfr <dbl>,
## # cfr2 <dbl>, cfxA <dbl>, `cmlA1-01` <dbl>, cmr <dbl>, dfrA <dbl>, …
##
## $K07
## # A tibble: 10 × 96
## FarmID Group Sample_Name SamplingDay `aac3-liacde` `aac6-aph2` `aac6-lb`
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 K06 Control K06-BC Before 4.74 7.42 5.75
## 2 K06 Control K06-07C 7 1.96 5.55 2.21
## 3 K06 Control K06-14C 14 0.205 4.55 0.138
## 4 K06 Control K06-60C 60 1.46 6.01 1.69
## 5 K06 Control K06-EC End 0.856 5.57 1.08
## 6 K06 Treatment K06-B Before 3.65 6.20 5.18
## 7 K06 Treatment K06-07 7 4.00 5.85 3.93
## 8 K06 Treatment K06-14 14 1.96 5.01 0.803
## 9 K06 Treatment K06-60 60 5.36 3.07 5.93
## 10 K06 Treatment K06-E End 5.72 5.49 4.44
## # … with 89 more variables: `aac6-li` <dbl>, `aac6-lla` <dbl>, aadA <dbl>,
## # aadE <dbl>, `aadE-like` <dbl>, acrA <dbl>, acrB <dbl>, acrF <dbl>,
## # `aph2-lb` <dbl>, `aph2-lde` <dbl>, `aph3-lac` <dbl>, `aph3-lll` <dbl>,
## # arnA <dbl>, bacA_1 <dbl>, bacA_2 <dbl>, blaAMPC <dbl>, blaCMY2 <dbl>,
## # blaCTXM <dbl>, blaDHA <dbl>, blaPSE <dbl>, blaSHV <dbl>, blaTEM <dbl>,
## # blaZ <dbl>, cat <dbl>, cblA <dbl>, cepA <dbl>, cepA2 <dbl>, cfr <dbl>,
## # cfr2 <dbl>, cfxA <dbl>, `cmlA1-01` <dbl>, cmr <dbl>, dfrA <dbl>, …
genes[[3]]
## # A tibble: 14 × 85
## FarmID Group Sample_Name SamplingDay `aac3-liacde` `aac6-aph2` `aac6-lb`
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 K08 Control K08-BC Before 5.36 5.38 6.51
## 2 K08 Control K08-AC After 3.22 6.12 1.57
## 3 K08 Control K08-07C 7 2.95 7.62 2.39
## 4 K08 Control K08-30C 30 1.86 6.81 0.746
## 5 K08 Control K08-60C 60 4.26 8.39 1.20
## 6 K08 Control K08-90C 90 2.05 7.14 1.39
## 7 K08 Control K08-EC End 2.56 5.63 0.272
## 8 K08 Treatment K08-B Before 3.92 3.69 4.90
## 9 K08 Treatment K08-A After 10.1 9.12 5.22
## 10 K08 Treatment K08-07 7 4.49 4.70 1.77
## 11 K08 Treatment K08-30 30 6.27 5.30 1.59
## 12 K08 Treatment K08-60 60 4.76 6.50 2.61
## 13 K08 Treatment K08-90 90 7.11 9.51 3.92
## 14 K08 Treatment K08-E End 8.64 9.36 4.69
## # … with 78 more variables: `aac6-li` <dbl>, `aac6-lla` <dbl>, aadA <dbl>,
## # aadE <dbl>, `aadE-like` <dbl>, acrA <dbl>, acrB <dbl>, acrF <dbl>,
## # `aph2-lb` <dbl>, `aph2-lde` <dbl>, `aph3-lac` <dbl>, `aph3-lll` <dbl>,
## # arnA <dbl>, bacA_1 <dbl>, bacA_2 <dbl>, blaAMPC <dbl>, blaCMY2 <dbl>,
## # blaCTXM <dbl>, blaDHA <dbl>, blaPSE <dbl>, blaSHV <dbl>, blaTEM <dbl>,
## # blaZ <dbl>, cat <dbl>, cblA <dbl>, cepA <dbl>, cepA2 <dbl>, cfr <dbl>,
## # cfr2 <dbl>, cfxA <dbl>, `cmlA1-01` <dbl>, cmr <dbl>, dfrA <dbl>, …
sort(unique(amu$AAI))
## [1] "ampicillin" "apramycin" "bromhexine"
## [4] "ceftiofur" "colistin" "dexamethasone"
## [7] "diaveridine" "doxycyline" "enrofloxacin"
## [10] "erythromycin" "florfenicol" "flumequine"
## [13] "gentamicin" "kitasamycin" "lincomycin"
## [16] "oxytetracyline" "sulfachloropyrazin" "sulfadimerazin"
## [19] "sulfadimidine" "sulfamethoxazole" "sulfaquinoxaline"
## [22] "thiamphenicol" "tiamulin" "trimethoprim"
## [25] "tylosin"
resistance_genes
## [1] "aac3-liacde" "aac6-aph2" "aac6-lb" "aac6-li" "aac6-lla"
## [6] "aadA" "aadE" "aadE-like" "acrA" "acrB"
## [11] "acrF" "aph2-lb" "aph2-lde" "aph3-lac" "aph3-lll"
## [16] "arnA" "bacA_1" "bacA_2" "blaAMPC" "blaCMY2"
## [21] "blaCTXM" "blaDHA" "blaPSE" "blaSHV" "blaTEM"
## [26] "blaZ" "cat" "cblA" "cepA" "cepA2"
## [31] "cfr" "cfr2" "cfxA" "cmlA1-01" "cmr"
## [36] "dfrA" "dfrF" "ermA" "ermB" "ermC"
## [41] "floR" "folA" "fosB" "fox5" "macB"
## [46] "mcr-1" "mcr-2" "mcr-3" "mdtF" "mdtL"
## [51] "mdtO" "mecA" "mefa10" "mefA3" "mfsA"
## [56] "oprD" "oqxA" "oqxB" "qacA" "qacC"
## [61] "qacE" "qnrA" "qnrB" "qnrS" "sat4"
## [66] "spc" "strB" "sul1" "sul2" "sul3"
## [71] "tetB" "tetC-01" "tetM" "tetO" "tetQ"
## [76] "tetW" "tolC" "vanA" "vanB" "vatA"
The time points:
genes %>%
map(pull, SamplingDay) %>%
unlist() %>%
unique()
## [1] "Before" "7" "14" "60" "End" "After" "30" "90"
Generating new time points:
genes %<>% map(mutate,
SamplingDay2 = as.integer(recode(SamplingDay,
Before = "-7",
After = "0",
End = "120"))) %>%
map(arrange, Group, SamplingDay2) # making sure that data are arranged chronologically
A customized lines() function:
lines2 <- function(...) lines(..., type = "o", lwd = 2)
A function that plots a given gene concentration as a function of time for control and treatment:
plot_gene_concentration <- function(farm, antimicrobial, text) {
farm_dataset <- genes[[farm]]
plot(farm_dataset$SamplingDay2, farm_dataset[[antimicrobial]], type = "n",
xlim = c(-8, 120), xlab = "time (days)", ylab = "gene concentration")
controls <- filter(farm_dataset, Group == "Control")
treatments <- filter(farm_dataset, Group == "Treatment")
lines2(controls$SamplingDay2, controls[[antimicrobial]], col = "blue")
lines2(treatments$SamplingDay2, treatments[[antimicrobial]], col = "red")
mtext(text)
}
Plotting aac3-liacde for control and treatment in farm K06
plot_gene_concentration(farms[1], resistance_genes[1], resistance_genes[1])
Number of columns we want:
ncols <- 3
Plotting all the genes for the first farm:
opar <- par(mfrow = c(ceiling(length(resistance_genes) / ncols), ncols),
plt = c(.13, .92, .22, .9))
walk(resistance_genes, ~ plot_gene_concentration(farms[1], .x, .x))
par(opar)
Plotting all the farms for the first gene:
opar <- par(mfrow = c(ceiling(length(farms) / ncols), ncols),
plt = c(.13, .92, .22, .9))
walk(farms, ~ plot_gene_concentration(.x, resistance_genes[1], .x))
par(opar)
Let’s now try to standardize the gene concentrations by the gene concentration before treatment. This function standardizes one experiment:
standardize_by_before_ <- function(x) {
rbind(rep(1, length(resistance_genes)),
sweep(as.matrix(x[x$SamplingDay != "Before", resistance_genes]), 2,
unlist(x[x$SamplingDay == "Before", resistance_genes]), `/`))
}
This function standardizes the control of and the treatment experiments of a farm:
standardize_by_before <- function(x) {
x[, resistance_genes] <- rbind(standardize_by_before_(filter(x, Group == "Control")),
standardize_by_before_(filter(x, Group == "Treatment")))
x
}
Let’s standardize the gene concentrations for all the farms:
genes_standardized <- map(genes, standardize_by_before)
Let’s now plot all the farms on a single plot for a given antimicrobial. Let’s first define the following function that draw the layout the plot:
plot_frame <- function(...) {
plot(..., type = "n", xlab = "time (days)", ylab = "standardized genes concentrations")
}
Now, we can start exploring various option of plotting the data, starting with this function:
plot_gene_all_farms <- function(antimicrobial) {
tmp <- genes_standardized %>%
map(extract, c("Group", "SamplingDay2", antimicrobial))
tmp2 <- bind_rows(tmp)
plot_frame(tmp2[[2]], tmp2[[3]])
tmp %>%
map(filter, Group == "Control") %>%
walk(~ lines2(.x[[2]], .x[[3]], col = "blue"))
tmp %>%
map(filter, Group == "Treatment") %>%
walk(~ lines2(.x[[2]], .x[[3]], col = "red"))
mtext(antimicrobial)
legend("topright", legend = c("treatment", "control"), col = c("red", "blue"),
lty = 1, lwd = 2, bty = "n")
}
Let’s try it on one antimicrobial:
plot_gene_all_farms(resistance_genes[1])
Let’s try an alternative visualization, using this following function for box-plots:
boxplot2 <- function(x, eps, col, ...) {
boxplot(x[[3]] ~ x[[2]], at = unique(x[[2]]) + eps, add = TRUE, axes = FALSE,
boxwex = 2.5, col = adjustcolor(col, .5), outline = FALSE, ...)
}
Here is the alternative visualization:
plot_gene_all_farms_boxplot <- function(antimicrobial) {
eps <- 1.5
tmp <- genes_standardized %>%
map(filter, SamplingDay != "Before") %>%
map(extract, c("Group", "SamplingDay2", antimicrobial))
tmp2 <- bind_rows(tmp)
plot_frame(tmp2[[2]], tmp2[[3]])
control <- map(tmp, filter, Group == "Control")
walk(control, ~ points(jitter(.x[[2]] - 2), .x[[3]], col = "blue"))
control %>%
bind_rows() %>%
boxplot2(-eps, col = "blue")
treatment <- map(tmp, filter, Group == "Treatment")
walk(treatment, ~ points(jitter(.x[[2]] + 2), .x[[3]], col = "red"))
treatment %>%
bind_rows() %>%
boxplot2(eps, col = "red")
mtext(antimicrobial)
legend("topright", legend = c("treatment", "control"), fill = c("red", "blue"), bty = "n")
}
Let’s try it:
plot_gene_all_farms_boxplot(resistance_genes[1])
Mmmm… For some reason, interquartiles ranges look a bit weird on some of these boxplots. Not sure how this is calculated but I don’t really like it.
Let’s try a violin plot instead, by using this function:
vioplot2 <- function(x, eps, color, ...) {
vioplot::vioplot(x[[3]] ~ x[[2]], at = unique(x[[2]]) + eps, add = TRUE,
axes = FALSE, fill = color, lineCol = color, border = color,
wex = 4, col = adjustcolor(color, .5), ...)
}
And the new plotting function:
plot_gene_all_farms_violin <- function(antimicrobial) {
eps <- 1.5
tmp <- genes_standardized %>%
map(filter, SamplingDay != "Before") %>%
map(extract, c("Group", "SamplingDay2", antimicrobial))
tmp2 <- bind_rows(tmp)
plot_frame(tmp2[[2]], tmp2[[3]])
control <- map(tmp, filter, Group == "Control")
walk(control, ~ points(jitter(.x[[2]] - 2), .x[[3]], col = "blue"))
control %>%
bind_rows() %>%
vioplot2(-eps, "blue")
treatment <- map(tmp, filter, Group == "Treatment")
walk(treatment, ~ points(jitter(.x[[2]] + 2), .x[[3]], col = "red"))
treatment %>%
bind_rows() %>%
vioplot2(eps, "red")
mtext(antimicrobial)
legend("topright", legend = c("treatment", "control"), fill = c("red", "blue"), bty = "n")
}
Let’s try it too:
plot_gene_all_farms_violin(resistance_genes[1])
Not sure I like it either…
Let’s plot paired treatment and control for each farm instead. For that, we need the following function:
plot_gene_all_farms_pairwise <- function(antimicrobial) {
col_val <- adjustcolor(c("blue", "red"), .5)
genes %>%
map(extract, c("SamplingDay2", "Group", antimicrobial)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
bind_rows() %>%
mutate(SamplingDay3 = jitter(SamplingDay2),
color = (Treatment > Control) + 1) %>%
with({
plot_frame(rep(SamplingDay3, 2), c(Control, Treatment))
points(SamplingDay3, Control, col = "blue")
points(SamplingDay3, Treatment, col = "red")
arrows(SamplingDay3, Control, SamplingDay3, Treatment, 0, col = col_val[color])
})
mtext(antimicrobial)
}
Let’s try it:
plot_gene_all_farms_pairwise(resistance_genes[1])
Let’s plot the difference between treatment and control for each farm. For that, we need the following function:
plot_differences <- function(antimicrobial) {
tmp <- genes %>%
map(extract, c("SamplingDay2", "Group", antimicrobial)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control)
tmp %>%
bind_rows() %$%
plot_frame(SamplingDay2, difference)
walk(tmp, ~ with(.x, lines2(SamplingDay2, difference, col = "green")))
abline(h = 0)
mtext(antimicrobial)
}
Let’s try it:
plot_differences(resistance_genes[1])
Let’s consider this function with boxplots:
plot_differences_boxplot <- function(antimicrobial) {
tmp <- genes %>%
map(extract, c("SamplingDay2", "Group", antimicrobial)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control) %>%
bind_rows()
with(tmp, plot(jitter(SamplingDay2), difference, col = "green",
xlab = "time (days)",
ylab = "difference in standardized genes concentrations"))
tmp %>%
select(2, 1, 3) %>%
boxplot2(0, col = "green")
abline(h = 0)
mtext(antimicrobial)
}
Let’s try it:
plot_differences_boxplot(resistance_genes[1])
The boxplot still looks weird.
Let’s look at the violin alternative:
plot_differences_violin <- function(antimicrobial) {
tmp <- genes %>%
map(extract, c("SamplingDay2", "Group", antimicrobial)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control) %>%
bind_rows()
with(tmp, plot(jitter(SamplingDay2), difference, col = "green",
xlab = "time (days)",
ylab = "difference in standardized genes concentrations"))
tmp %>%
select(2, 1, 3) %>%
vioplot2(0, col = "green")
abline(h = 0)
mtext(antimicrobial)
}
Let’s try it:
plot_differences_violin(resistance_genes[1])
Same comment.
Let’s consider another option.
plot_differences_quantiles <- function(antimicrobial) {
tmp <- genes %>%
map(extract, c("SamplingDay2", "Group", antimicrobial)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control) %>%
bind_rows()
with(tmp, plot(jitter(SamplingDay2), difference, col = "darkgrey",
xlab = "time (days)",
ylab = "difference in standardized genes concentrations"))
x_val <- sort(unique(tmp$SamplingDay2))
tmp %>%
select(SamplingDay2, difference) %>%
group_by(SamplingDay2) %>%
group_split() %>%
map(~ quantile(.x$difference, c(.25, .5, .75), na.rm = TRUE)) %>%
bind_rows() %>%
mutate(x_val = x_val) %>%
with({
points(x_val, `50%`, lwd = 2)
arrows(x_val, `25%`, x_val, `75%`, .1, 90, 3, lwd = 2, col = "black")
lines(x_val, `25%`, lty = 3)
lines(x_val, `50%`, lty = 2)
lines(x_val, `75%`, lty = 3)
})
abline(h = 0)
mtext(antimicrobial)
}
Let’s try it:
plot_differences_quantiles(resistance_genes[1])